Compare kmt5b and hdlbpa mutant RNAseq data from 6dpf zebrafish heads.
library("tidyverse")
library("DESeq2") #finding differentially expressed genes
library("cowplot") #arranging plots into grids
library("viridis") #viridis color schemes
library("scales") #use to get color schemes for viridis
library("ggrepel") #repels text labels on plots
library("RColorBrewer") #pick colors
library("DT") #interactive and searchable tables of our GSEA results
library("GSEABase") #functions and methods for Gene Set Enrichment Analysis
library("Biobase") #base functions for bioconductor; required by GSEABase
library("GSVA") #Gene Set Variation Analysis, a non-parametric and unsupervised method for estimating variation of gene set enrichment across samples.
library("gprofiler2") #tools for accessing the GO enrichment results using g:Profiler web resources
library("clusterProfiler") # provides a suite of tools for functional enrichment analysis
library("msigdbr") # access to msigdb collections directly within R
library("enrichplot") # great for making the standard GSEA enrichment plots
library("ontologyIndex") #for parsing obo files
library("BaseSet") #for importing gaf file
library("plotly") #make interactive plots
library("orthogene") #convert gene names between species
library("ChIPseeker") #for annotating and browsing chip-seq data
library("TxDb.Hsapiens.UCSC.hg38.knownGene", pos = .Machine$integer.max) #human gene annotation for chip-seq data
library("EnsDb.Hsapiens.v75", pos = .Machine$integer.max) #human gene annotation for chip-seq data
library("org.Dr.eg.db") #convert ensembl gene names to gene id
library("lattice") #used for making manhattan plot
library("ggpubr") #calculating correlation coefficient
library("colorspace") #colors for heatmap
library("ggraph") #library for making network graphs
library("svglite") #export svgs in loop
library("eulerr") #make venn diagrams#import deseq output for kmt5b data
kmt5b <- read.csv("kmt5b-homvswt3x3_allresults_wt_hom-with-normalized.csv")[,2:9]
#import normalized counts for kmt5b data
kmt5b_genecounts <- read.csv("kmt5b-homvswt3x3_normalized_reads_gene_list.csv")[,2:10]
#import deseq output for hdlbpa
hdlbpa <- read.csv("hdlbpa-homvswt_allresults_wt_hom-with-normalized.csv")[,2:9]
#import normalized counts for hdlbpa
hdlbpa_genecounts <- read.csv("hdlbpa-homvswt_normalized_reads_gene_list.csv")[,2:12]
#add columns to old and new data about whether data is old or new
kmt5b$mutation <- "kmt5b"
hdlbpa$mutation <- "hdlbpa"
#combine two mutations
kmt5b_hdlbpa <- rbind(kmt5b, hdlbpa)
#add some information
#limit padj to 1e-10
kmt5b_hdlbpa <- kmt5b_hdlbpa %>% mutate(padjbound = ifelse(kmt5b_hdlbpa$padj < 1e-10, 1e-10, kmt5b_hdlbpa$padj))
#limit pvalue to 1e-10
kmt5b_hdlbpa <- kmt5b_hdlbpa %>% mutate(pvaluebound = ifelse(kmt5b_hdlbpa$pvalue < 1e-10, 1e-10, kmt5b_hdlbpa$pvalue))
#import annotations for chromosome and TSS
chromosomeinfo <- readr::read_tsv(file="ncbi_refseqgenes")
#keep chromosome and gene name
chromosomeinfo <- chromosomeinfo %>% dplyr::select(chrom, name2, txStart)
kmt5b_hdlbpa$chrom <- NA
kmt5b_hdlbpa$txStart <- NA
#add chromosome and txStart information
for (x in 1:length(kmt5b_hdlbpa$LLgeneAbbrev)) {
gene <- kmt5b_hdlbpa$LLgeneAbbrev[x]
chromosomeinfosub <- chromosomeinfo %>% dplyr::filter(name2 == gene)
chr <- chromosomeinfosub$chrom[1]
txStart <- chromosomeinfosub$txStart[1]
kmt5b_hdlbpa$chrom[x] <- chr
kmt5b_hdlbpa$txStart[x] <- txStart
}
#add chromosome number for RASGRF1
kmt5b_hdlbpa <- kmt5b_hdlbpa %>% mutate(chrom = ifelse(LLgeneAbbrev == "RASGRF1", "chr18", chrom))
#add a column with chromosome number
kmt5b_hdlbpa <- kmt5b_hdlbpa %>% mutate(chromosomenumber = str_replace_all(chrom, c("chr1"="1", "chr2"="2", "chr3"="3", "chr4"="4", "chr5"="5", "chr6"="6", "chr7"="7", "chr8"="8", "chr9"="9", "chr10"="10", "chr11"="11", "chr12"="12", "chr13"="13", "chr14"="14", "chr15"="15", "chr16"="16", "chr17"="17", "chr18"="18", "chr19"="19", "chr20"="20", "chr21"="21", "chr22"="22", "chr23"="23", "chr24"="24", "chr25"="25")))
kmt5b_hdlbpa <- distinct(kmt5b_hdlbpa)
#export csv with DEG for both 2dpf and 6dpf
write.csv(kmt5b_hdlbpa,file = "kmt5b_hdlbpa_combined.csv")Are differentially expressed genes located at a particular place in the genome?
This function is from: https://genome.sph.umich.edu/wiki/Code_Sample:_Generating_Manhattan_Plots_in_R
#import comparisons (to skip running previous steps)
kmt5b_hdlbpa <- read.csv("kmt5b_hdlbpa_combined.csv")[,2:15]
#function for making manhattan plots
manhattan.plot<-function(chr, pos, pvalue,
sig.level=NA, annotate=NULL, ann.default=list(),
should.thin=T, thin.pos.places=2, thin.logp.places=2,
xlab="Chromosome", ylab=expression(-log[10](p-value)),
col=c("gray","darkgray"), panel.extra=NULL, pch=20, cex=0.8,...) {
if (length(chr)==0) stop("chromosome vector is empty")
if (length(pos)==0) stop("position vector is empty")
if (length(pvalue)==0) stop("pvalue vector is empty")
#make sure we have an ordered factor
if(!is.ordered(chr)) {
chr <- ordered(chr)
} else {
chr <- chr[,drop=T]
}
#make sure positions are in kbp
if (any(pos>1e6)) pos<-pos/1e6;
#calculate absolute genomic position
#from relative chromosomal positions
posmin <- tapply(pos,chr, min);
posmax <- tapply(pos,chr, max);
posshift <- head(c(0,cumsum(posmax)),-1);
names(posshift) <- levels(chr)
genpos <- pos + posshift[chr];
getGenPos<-function(cchr, cpos) {
p<-posshift[as.character(cchr)]+cpos
return(p)
}
#parse annotations
grp <- NULL
ann.settings <- list()
label.default<-list(x="peak",y="peak",adj=NULL, pos=3, offset=0.5,
col=NULL, fontface=NULL, fontsize=NULL, show=F)
parse.label<-function(rawval, groupname) {
r<-list(text=groupname)
if(is.logical(rawval)) {
if(!rawval) {r$show <- F}
} else if (is.character(rawval) || is.expression(rawval)) {
if(nchar(rawval)>=1) {
r$text <- rawval
}
} else if (is.list(rawval)) {
r <- modifyList(r, rawval)
}
return(r)
}
if(!is.null(annotate)) {
if (is.list(annotate)) {
grp <- annotate[[1]]
} else {
grp <- annotate
}
if (!is.factor(grp)) {
grp <- factor(grp)
}
} else {
grp <- factor(rep(1, times=length(pvalue)))
}
ann.settings<-vector("list", length(levels(grp)))
ann.settings[[1]]<-list(pch=pch, col=col, cex=cex, fill=col, label=label.default)
if (length(ann.settings)>1) {
lcols<-trellis.par.get("superpose.symbol")$col
lfills<-trellis.par.get("superpose.symbol")$fill
for(i in 2:length(levels(grp))) {
ann.settings[[i]]<-list(pch=pch,
col=lcols[(i-2) %% length(lcols) +1 ],
fill=lfills[(i-2) %% length(lfills) +1 ],
cex=cex, label=label.default);
ann.settings[[i]]$label$show <- T
}
names(ann.settings)<-levels(grp)
}
for(i in 1:length(ann.settings)) {
if (i>1) {ann.settings[[i]] <- modifyList(ann.settings[[i]], ann.default)}
ann.settings[[i]]$label <- modifyList(ann.settings[[i]]$label,
parse.label(ann.settings[[i]]$label, levels(grp)[i]))
}
if(is.list(annotate) && length(annotate)>1) {
user.cols <- 2:length(annotate)
ann.cols <- c()
if(!is.null(names(annotate[-1])) && all(names(annotate[-1])!="")) {
ann.cols<-match(names(annotate)[-1], names(ann.settings))
} else {
ann.cols<-user.cols-1
}
for(i in seq_along(user.cols)) {
if(!is.null(annotate[[user.cols[i]]]$label)) {
annotate[[user.cols[i]]]$label<-parse.label(annotate[[user.cols[i]]]$label,
levels(grp)[ann.cols[i]])
}
ann.settings[[ann.cols[i]]]<-modifyList(ann.settings[[ann.cols[i]]],
annotate[[user.cols[i]]])
}
}
rm(annotate)
#reduce number of points plotted
if(should.thin) {
thinned <- unique(data.frame(
logp=round(-log10(pvalue),thin.logp.places),
pos=round(genpos,thin.pos.places),
chr=chr,
grp=grp)
)
logp <- thinned$logp
genpos <- thinned$pos
chr <- thinned$chr
grp <- thinned$grp
rm(thinned)
} else {
logp <- -log10(pvalue)
}
rm(pos, pvalue)
gc()
#custom axis to print chromosome names
axis.chr <- function(side,...) {
if(side=="bottom") {
panel.axis(side=side, outside=T,
at=((posmax+posmin)/2+posshift),
labels=levels(chr),
ticks=F, rot=0,
check.overlap=F
)
} else if (side=="top" || side=="right") {
panel.axis(side=side, draw.labels=F, ticks=F);
}
else {
axis.default(side=side,...);
}
}
#make sure the y-lim covers the range (plus a bit more to look nice)
prepanel.chr<-function(x,y,...) {
A<-list();
maxy<-ceiling(max(y, ifelse(!is.na(sig.level), -log10(sig.level), 0)))+.5;
A$ylim=c(0,maxy);
A;
}
xyplot(logp~genpos, chr=chr, groups=grp,
axis=axis.chr, ann.settings=ann.settings,
prepanel=prepanel.chr, scales=list(axs="i"),
panel=function(x, y, ..., getgenpos) {
if(!is.na(sig.level)) {
#add significance line (if requested)
panel.abline(h=-log10(sig.level), lty=2);
}
panel.superpose(x, y, ..., getgenpos=getgenpos);
if(!is.null(panel.extra)) {
panel.extra(x,y, getgenpos, ...)
}
},
panel.groups = function(x,y,..., subscripts, group.number) {
A<-list(...)
#allow for different annotation settings
gs <- ann.settings[[group.number]]
A$col.symbol <- gs$col[(as.numeric(chr[subscripts])-1) %% length(gs$col) + 1]
A$cex <- gs$cex[(as.numeric(chr[subscripts])-1) %% length(gs$cex) + 1]
A$pch <- gs$pch[(as.numeric(chr[subscripts])-1) %% length(gs$pch) + 1]
A$fill <- gs$fill[(as.numeric(chr[subscripts])-1) %% length(gs$fill) + 1]
A$x <- x
A$y <- y
do.call("panel.xyplot", A)
#draw labels (if requested)
if(gs$label$show) {
gt<-gs$label
names(gt)[which(names(gt)=="text")]<-"labels"
gt$show<-NULL
if(is.character(gt$x) | is.character(gt$y)) {
peak = which.max(y)
center = mean(range(x))
if (is.character(gt$x)) {
if(gt$x=="peak") {gt$x<-x[peak]}
if(gt$x=="center") {gt$x<-center}
}
if (is.character(gt$y)) {
if(gt$y=="peak") {gt$y<-y[peak]}
}
}
if(is.list(gt$x)) {
gt$x<-A$getgenpos(gt$x[[1]],gt$x[[2]])
}
do.call("panel.text", gt)
}
},
xlab=xlab, ylab=ylab,
panel.extra=panel.extra, getgenpos=getGenPos, ...
);
}
#select columns to include in manhattan plot
myTopHits.df <- kmt5b_hdlbpa %>% dplyr::select(chromosomenumber, txStart, pvaluebound, mutation)
#filter to only include chromosomes 1-25
myTopHits.df <- myTopHits.df %>% dplyr::filter(chromosomenumber %in% 1:25)
#make chromosomes plotted in order
myTopHits.df$chromosomenumber <- factor(myTopHits.df$chromosomenumber, levels = 1:25)
#omit NA values
myTopHits.df <- na.omit(myTopHits.df)
#make colors
manhattancol <- c(replicate(8, c("lightgrey", "darkgrey")), "lightgrey", "#21908CFF", replicate(4, c("lightgrey", "darkgrey")))
#filter comparisons to only include old data
manhattan <- myTopHits.df %>% dplyr::filter(mutation == "kmt5b")
#make manhattan plot
manhattan.plot(manhattan$chromosomenumber, manhattan$txStart, manhattan$pvaluebound, should.thin=F, col=manhattancol)#filter comparisons to only include old data
manhattan <- myTopHits.df %>% dplyr::filter(mutation == "hdlbpa")
#make colors
manhattancol <- c(replicate(2, c("lightgrey", "darkgrey")), "lightgrey", "#21908CFF", replicate(9, c("lightgrey", "darkgrey")))
#make manhattan plot
manhattan.plot(manhattan$chromosomenumber, manhattan$txStart, manhattan$pvaluebound, should.thin=F, col=manhattancol)#import annotations for chromosome and gene name
chromosomeinfo <- readr::read_tsv(file="ncbi_refseqgenes")
#keep chromosome and gene name
chromosomeinfo <- chromosomeinfo %>% dplyr::select(chrom, name2)
#change column names
colnames(chromosomeinfo) <- c("chromosome", "genename")
#select only chromosomes 1-25
chromosomeinfo <- chromosomeinfo %>% dplyr::filter(chromosome %in% paste("chr", 1:25, sep=""))
#keep only distinct rows
chromosomeinfo <- distinct(chromosomeinfo)# Perform GSEA using clusterProfiler
#filter to only one comparison
GSEAgenes <- kmt5b_hdlbpa %>% dplyr::filter(mutation == "kmt5b")
#keep only the columns we need for GSEA
mydata.df.sub <- dplyr::select(GSEAgenes, LLgeneAbbrev, log2FoldChange)
#sort by abs(foldchange)
mydata.df.sub$log2FoldChange <- abs(mydata.df.sub$log2FoldChange)
# construct a named vector
mydata.gsea <- mydata.df.sub$log2FoldChange
names(mydata.gsea) <- as.character(mydata.df.sub$LLgeneAbbrev)
mydata.gsea <- sort(mydata.gsea, decreasing = TRUE)
# run GSEA using the 'GSEA' function from clusterProfiler
myGSEA.res <- GSEA(mydata.gsea, TERM2GENE=chromosomeinfo, verbose=FALSE, scoreType="pos", maxGSSize=2000, pvalueCutoff = 1, seed=TRUE)
myGSEA.df <- as_tibble(myGSEA.res@result)
# view results as an interactive table
datatable(myGSEA.df,
extensions = c('KeyTable', "FixedHeader"),
options = list(keys = TRUE, searchHighlight = TRUE, pageLength = 10, lengthMenu = c("10", "25", "50", "100"))) %>%
formatRound(columns=c(2:10), digits=2)# create enrichment plots using the enrichplot package
gseaplot2(myGSEA.res,
geneSetID = c(1), #can choose multiple signatures to overlay in this plot
pvalue_table = FALSE, #can set this to FALSE for a cleaner plot
title = myGSEA.res$Description[1]) #can also turn off this titleChromosome 18 genes are enriched in the kmt5b mutants.
# Perform GSEA using clusterProfiler
#filter to only one comparison
GSEAgenes <- kmt5b_hdlbpa %>% dplyr::filter(mutation == "hdlbpa")
#keep only the columns we need for GSEA
mydata.df.sub <- dplyr::select(GSEAgenes, LLgeneAbbrev, log2FoldChange)
#sort by abs(foldchange)
mydata.df.sub$log2FoldChange <- abs(mydata.df.sub$log2FoldChange)
# construct a named vector
mydata.gsea <- mydata.df.sub$log2FoldChange
names(mydata.gsea) <- as.character(mydata.df.sub$LLgeneAbbrev)
mydata.gsea <- sort(mydata.gsea, decreasing = TRUE)
# run GSEA using the 'GSEA' function from clusterProfiler
myGSEA.res <- GSEA(mydata.gsea, TERM2GENE=chromosomeinfo, verbose=FALSE, scoreType="pos", maxGSSize=2000, pvalueCutoff = 1, seed=TRUE)
myGSEA.df <- as_tibble(myGSEA.res@result)
# view results as an interactive table
datatable(myGSEA.df,
extensions = c('KeyTable', "FixedHeader"),
options = list(keys = TRUE, searchHighlight = TRUE, pageLength = 10, lengthMenu = c("10", "25", "50", "100"))) %>%
formatRound(columns=c(2:10), digits=2)# create enrichment plots using the enrichplot package
gseaplot2(myGSEA.res,
geneSetID = c(1), #can choose multiple signatures to overlay in this plot
pvalue_table = FALSE, #can set this to FALSE for a cleaner plot
title = myGSEA.res$Description[1]) #can also turn off this titleChromosome 6 genes are enriched in the hdlbpa mutant.
#import normalized counts for kmt5b data
kmt5b_genecounts <- read.csv("kmt5b-homvswt3x3_normalized_reads_gene_list.csv")[,2:10]
#pick out a genes to plot
geneofinterest <- c(sharedgenes, "grb10b", "aurkb", "c1ql3a")
#make new columns with mean counts of wt
genecountslogfc <- kmt5b_genecounts
genecountslogfc$kmt5bavg <- rowMeans(genecountslogfc[,7:9])
#divide columns by mean counts of wt
genecountslogfc <- genecountslogfc %>% mutate(across(colnames(genecountslogfc)[4:9], function(x) x/kmt5bavg))
#pull out data for a select gene
generepdata <- genecountslogfc %>% dplyr::filter(LLgeneAbbrev %in% geneofinterest)
#get rid of unnecessary columns
generepdata <- generepdata[,c(1,4:9)]
#pivot longer to make tidy
generepdata <- generepdata %>% pivot_longer(!LLgeneAbbrev, names_to = "sample", values_to = "foldchange")
#add condition information based on sampleName
generepdata$genotype <- generepdata$sample
#substitute condition name based on replicate
generepdata <- generepdata %>% mutate(genotype = str_replace_all(genotype, c("^wt.*"="wt", "^hom.*"="hom")))
#order data on plot
generepdata$genotype <- factor(generepdata$genotype, levels=c("wt", "hom"))
#set colors
colors <- c("#472D7BFF", "#21908CFF")
#make an empty list
plot_list = list()
#make all the plots
for (z in 1:length(geneofinterest)) {
genedata2dpf <- generepdata %>% dplyr::filter(LLgeneAbbrev == geneofinterest[z])
plot <- ggplot(genedata2dpf, aes(fill=genotype, y=foldchange, x=genotype)) +
geom_bar(position="dodge", stat="summary") +
geom_point(position = position_dodge(width = .9)) +
labs(title = paste(geneofinterest[z])) +
ylab("fold change") +
theme_bw() +
scale_fill_manual(values = colors)
plot_list[[z]] <- plot
}
# Save plots to svg Makes a separate file for each plot.
for (i in 1:length(geneofinterest)) {
file_name = paste("kmt5b_foldchange_", geneofinterest[i], ".svg", sep="")
svglite(file_name)
print(plot_list[[i]])
dev.off()
} #subset data to kmt5b
myTopHits.df <- kmt5b_hdlbpa %>% dplyr::filter(mutation == "kmt5b")
#edit padj bound to e-20
myTopHits.df <- myTopHits.df %>% dplyr::mutate(padjbound = ifelse(padj < 1e-20, 1e-20, padj))
#add column about whether chr is 18
myTopHits.df <- myTopHits.df %>% dplyr::mutate(chrom = replace_na(chrom, "none"))
myTopHits.df <- myTopHits.df %>% mutate(chromosome = ifelse(chrom == "chr18", "chromosome 18", "other chromosome"))
#list of insulin genes
myTopHits.insulin <- myTopHits.df %>% dplyr::filter(LLgeneAbbrev %in% c("grb10b", "irs4a", "socs2", "igf2b", "pik3r3b", "raf1b", "rapgef1a", "rhebl1", "pik3cb", "ppp1cbl", "rps6"))
myTopHits.insulin$category <- "insulin"
myTopHits.glycolysis <- myTopHits.df %>% dplyr::filter(LLgeneAbbrev %in% c("adh5", "eno1a", "eno3", "pgk1", "tpila"))
myTopHits.glycolysis$category <- "glycolysis"
myTopHits.cellcycle <- myTopHits.df %>% dplyr::filter(LLgeneAbbrev %in% c("setdb2", "thap1", "anapc2", "cdc37"))
myTopHits.cellcycle$category <- "cell cycle"
myTopHits.synapse <- myTopHits.df %>% dplyr::filter(LLgeneAbbrev %in% c("adora1b", "chata", "gpc2", "pmchl", "syn2b", "syt1a", "tmem163a"))
myTopHits.synapse$category <- "synapse"
myTopHits.translation <- myTopHits.df %>% dplyr::filter(LLgeneAbbrev %in% c("eif3ea", "eif3f", "eif3g", "eif3i", "faua", "rpl10", "rpl13", "rpl18a", "rpl19", "rpl24", "rpl26", "rpl27a", "rpl3", "rpl37", "rpl6", "rpl7", "rpl7a", "rpl8", "rps15", "rps19", "rps2", "rps26", "rps38", "rps3a", "rps4x", "rps5", "rps9", "rpsa", "rplp2", "rplp2l"))
myTopHits.translation$category <- "translation"
#genes to label
myTopHits.labels <- rbind(myTopHits.insulin, myTopHits.glycolysis, myTopHits.cellcycle, myTopHits.synapse, myTopHits.translation)
#change order that points are plotted
myTopHits.labels <- myTopHits.labels %>% arrange(match(category, c("translation", "glycolysis", "insulin", "cell cycle", "synapse")), desc(category))
genestolabel <- c("si:dkey-242h9.3", "cd82b", "bcar1", "irs4a", "mbd4", "carmil2", "nars", "cngb1a", "RASGRF1", "mlc1", "grb10b", "slc6a6b", "XLOC_024221", "lmnb2", "opn1lw1", "b3gnt2l", "XLOC_013061", "grb10b", "irs4a", "socs2", "igf2b", "pik3r3b", "raf1b", "rapgef1a", "rhebl1", "pik3cb", "ppp1cbl", "rps6", "adh5", "eno1a", "eno3", "pgk1", "tpila", "setdb2", "thap1", "anapc2", "cdc37", "adora1b", "chata", "gpc2", "pmchl", "syn2b", "syt1a", "tmem163a", "eif3ea", "eif3f", "eif3g", "eif3i")
genestolabel <- c("si:dkey-242h9.3", "cd82b", "bcar1", "irs4a", "mbd4", "carmil2", "nars", "cngb1a", "RASGRF1", "mlc1", "grb10b", "slc6a6b", "XLOC_024221", "lmnb2", "opn1lw1", "b3gnt2l", "XLOC_013061")
#subset to only include labeled genes
myTopHits.labels.all <- myTopHits.df %>% dplyr::filter(LLgeneAbbrev %in% genestolabel)
#make all points other
myTopHits.df <- myTopHits.df %>% dplyr::mutate(mutation = "other")
#make the plot
kmt5b_volcano <- ggplot() +
annotate("rect", xmin = 1, xmax = 5.5, ymin = -log10(0.05), ymax = 20,
alpha=.15, fill="grey") +
annotate("rect", xmin = -1, xmax = -5.5, ymin = -log10(0.05), ymax = 20,
alpha=.15, fill="grey") +
geom_point(data=myTopHits.df, aes(y=-log10(padjbound), x=log2FoldChange, shape = chromosome, color = mutation), size=2) +
geom_point(data=myTopHits.labels, aes(y=-log10(padjbound), x = log2FoldChange, color=category, shape=chromosome), size=2, show.legend = T) +
theme_bw() +
coord_cartesian(xlim = c(-6, 6), ylim = c(-0.5, 20.5), expand = FALSE) +
ylab("-log10(padj)") +
xlab("log2 fold change") +
geom_label_repel(data=myTopHits.labels.all, aes(x=log2FoldChange, y=-log10(padjbound), label=LLgeneAbbrev), force = 2, nudge_y = -1, size = 2.5, max.overlaps = Inf, show.legend = FALSE, color = "black") + #label selected genes
theme_bw() +
scale_color_manual(values = c("#AADC32FF", "#27AD81FF", "#472D7BFF", "darkgrey", "gold", "deepskyblue", "orange", "#D697FF"), name = "pathway") +
scale_shape_manual(values=c(4, 16))
kmt5b_volcanoGet a list of the top 300 genes dysregulated in kmt5b by padj to put into DAVID.
#select kmt5b genes that aren't on chromosome 18
kmt5b_genesfordavid <- kmt5b_hdlbpa %>% dplyr::filter(mutation == "kmt5b" & chromosomenumber != 18)
#sort by pvalue
kmt5b_genesfordavid <- kmt5b_genesfordavid %>% dplyr::arrange(padj)
#get names of top 300 genes
kmt5b_genesfordavid <- kmt5b_genesfordavid$LLgeneAbbrev[1:300]
#write.csv(kmt5b_genesfordavid, "kmt5b_output/kmt5b_genesfordavid.csv")
# use the 'gost' function from the gprofiler2 package to run GO enrichment analysis
#genes shared between comparisons - both up and downregulated
gost.res.homwtall <- gost(kmt5b_genesfordavid, organism = "drerio", correction_method = "fdr", significant = TRUE, evcodes = TRUE, ordered_query = TRUE)
#export to csv
write.csv(apply(gost.res.homwtall$result,2,as.character), file="kmt5b_output/kmt5b_GO-analysis.csv")
# produce a manhattan plot of enriched GO terms
gostplot(gost.res.homwtall, interactive = T, capped = T) #we want the zebrafish signatures
dr_gsea <- msigdbr(species = "Danio rerio") #gets all collections/signatures with zebrafish
#look at the categories and subcategories of signatures available
dr_gsea %>%
dplyr::distinct(gs_cat, gs_subcat) %>%
dplyr::arrange(gs_cat, gs_subcat)# choose a specific msigdb collection/subcollection
dr_gsea_c5 <- msigdbr(species = "Danio rerio", category = "C5") %>% # choose msigdb collection of interest
dplyr::select(gs_name, gene_symbol) #just get the columns corresponding to signature name and gene symbols of genes in each signature
# pull out data for kmt5b
GSEAgenes <- kmt5b_hdlbpa %>% dplyr::filter(mutation == "kmt5b" & chromosomenumber != 18)
mydata.df.sub <- dplyr::select(GSEAgenes, LLgeneAbbrev, log2FoldChange)
#get rid of duplicates
mydata.df.sub <- mydata.df.sub %>% dplyr::filter(LLgeneAbbrev %in% mydata.df.sub$LLgeneAbbrev[!duplicated(mydata.df.sub$LLgeneAbbrev)])
# construct a named vector
mydata.gsea <- mydata.df.sub$log2FoldChange
names(mydata.gsea) <- as.character(mydata.df.sub$LLgeneAbbrev)
mydata.gsea <- sort(mydata.gsea, decreasing = TRUE)
# run GSEA with C5 in kmt5b
myGSEA.res <- GSEA(mydata.gsea, TERM2GENE=dr_gsea_c5, verbose=FALSE, seed=TRUE)
#convert to DF
myGSEA.df <- as_tibble(myGSEA.res)
#look at top terms
datatable(myGSEA.df,
extensions = c('KeyTable', "FixedHeader"),
options = list(keys = TRUE, searchHighlight = TRUE, pageLength = 10, lengthMenu = c("10", "25", "50", "100"))) %>%
formatRound(columns=c(2:10), digits=2)#import data
myGSEA.df <- read.csv("kmt5b_output/kmt5b-GSEA-c5.csv")[,2:12]
#list of terms to include in network
networkterms <- c("GOCC_CYTOSOLIC_RIBOSOME", "GOCC_RIBOSOMAL_SUBUNIT", "GOBP_CYTOPLASMIC_TRANSLATION", "GOMF_STRUCTURAL_CONSTITUENT_OF_RIBOSOME", "GOCC_RIBOSOME", "GOCC_CYTOSOLIC_LARGE_RIBOSOMAL_SUBUNIT", "GOCC_LARGE_RIBOSOMAL_SUBUNIT", "GOCC_CYTOSOLIC_SMALL_RIBOSOMAL_SUBUNIT", "GOCC_SMALL_RIBOSOMAL_SUBUNIT", "GOCC_POLYSOME", "GOCC_MITOCHONDRIAL_PROTEIN_CONTAINING_COMPLEX", "GOBP_DNA_REPLICATION", "GOBP_DNA_REPAIR", "GOCC_MITOCHONDRIAL_MATRIX", "HP_ABNORMAL_MYELINATION", "GOBP_DNA_DEPENDENT_DNA_REPLICATION", "GOBP_AEROBIC_RESPIRATION", "HP_ABNORMAL_CNS_MYELINATION", "HP_CEREBELLAR_HYPOPLASIA", "GOBP_DOUBLE_STRAND_BREAK_REPAIR", "HP_APLASIA_HYPOPLASIA_OF_THE_CEREBELLUM", "HP_HYPOPLASIA_OF_THE_CORPUS_CALLOSUM", "GOCC_INNER_MITOCHONDRIAL_MEMBRANE_PROTEIN_COMPLEX", "HP_ABNORMALITY_OF_THE_MITOCHONDRION", "GOBP_ATP_SYNTHESIS_COUPLED_ELECTRON_TRANSPORT", "GOBP_OXIDATIVE_PHOSPHORYLATION", "GOMF_ORGANIC_ANION_TRANSMEMBRANE_TRANSPORTER_ACTIVITY", "GOMF_SECONDARY_ACTIVE_TRANSMEMBRANE_TRANSPORTER_ACTIVITY", "GOBP_ANION_TRANSPORT", "GOBP_SODIUM_ION_TRANSMEMBRANE_TRANSPORT", "GOBP_IMPORT_ACROSS_PLASMA_MEMBRANE", "GOBP_MITOTIC_CELL_CYCLE_PHASE_TRANSITION", "GOBP_REGULATION_OF_CELL_CYCLE_PHASE_TRANSITION", "GOBP_CELL_CYCLE_PHASE_TRANSITION")
myGSEA.res.filter <- myGSEA.res
myGSEA.res.filter@result <- myGSEA.res.filter@result %>% dplyr::filter(ID %in% networkterms)
#make network plot
myGSEA.res.filter <- pairwise_termsim(myGSEA.res.filter)
kmt5bnetwork <- emapplot(myGSEA.res.filter, color="NES", categorySize="p.adjust", showCategory=length(networkterms))
#get data of out network plot
kmt5bnetwork <- ggplot_build(kmt5bnetwork)
networkdata <- kmt5bnetwork$plot$data
kmt5b_c5_network <- ggraph(networkdata) +
geom_edge_link(alpha=.8, aes_(width=~I(width)), colour='darkgrey') +
geom_node_point(aes(colour = color, size=size)) +
geom_node_text(aes(label=name), repel=TRUE) +
theme_void() +
scale_color_gradientn(colors = myheatcolors3, limit=c(-3.5,3.5), name="NES")
kmt5b_c5_network#export 1200x800
#make heatmap
p3 <- heatplot(myGSEA.res, foldChange=mydata.gsea, showCategory=749) + ggplot2::coord_flip()
#get data of out heatmap
p3 <- ggplot_build(p3)
heatmapdata <- p3$plot$data
#filter to only include fold changes > 0.5
genestoinclude <- heatmapdata %>% dplyr::filter(foldChange > 0.5 | foldChange < -0.5)
heatmapdata <- heatmapdata %>% dplyr::filter(Gene %in% genestoinclude$Gene)
#choose which clusters to keep in heatmap
heatmapdata <- heatmapdata %>% dplyr::filter(categoryID %in% c("HP_ABNORMAL_MYELINATION", "HP_ABNORMAL_CNS_MYELINATION", "HP_CEREBELLAR_HYPOPLASIA", "HP_APLASIA_HYPOPLASIA_OF_THE_CEREBELLUM", "HP_HYPOPLASIA_OF_THE_CORPUS_CALLOSUM", "GOBP_MITOTIC_CELL_CYCLE_PHASE_TRANSITION", "GOBP_REGULATION_OF_CELL_CYCLE_PHASE_TRANSITION", "GOBP_CELL_CYCLE_PHASE_TRANSITION"))
#use pivot wider to make untidy table
heatmapdata <- heatmapdata %>% pivot_wider(names_from = categoryID, values_from = foldChange, values_fill=NA)
#make matrix
heatmapmatrix <- as.matrix(heatmapdata[,2:9])
rownames(heatmapmatrix) <- heatmapdata$Gene
#turn matrix into 1s and 0s and cluster
heatmapmatrix<-ifelse(abs(heatmapmatrix) > 0,1,0)
heatmapmatrix[is.na(heatmapmatrix)] = 0
#cluster your selected genes
hc <- hclust(as.dist(1-cor(heatmapmatrix, method="spearman")), method="average") #cluster columns by spearman correlation
#turn heatmap data back into tidy table
heatmapdata <- heatmapdata %>% pivot_longer(!Gene, names_to = "categoryID", values_to = "foldChange")
#change order of data to be plotted
heatmapdata$categoryID <- factor(heatmapdata$categoryID, levels = hc$labels)
heatmapdata$Gene <- factor(heatmapdata$Gene, levels = rev(unique(heatmapdata$Gene)))
#rename dataframe columns
colnames(heatmapdata) <- c("Gene", "categoryID", "log2foldchange")
kmt5b_c5_heatmap <- ggplot(heatmapdata, aes(x = Gene, y = categoryID, fill = log2foldchange)) +
geom_tile(color="black") +
theme_classic() +
scale_fill_continuous_divergingx(palette = 'RdBu', mid = 0, na.value="white") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1), axis.title.x=element_blank(), axis.title.y=element_blank())
kmt5b_c5_heatmap + ggplot2::coord_flip()CNStermsGSEA <- read.csv("CNStermsGSEA.csv")[,2:3]
headtermsGSEA <- read.csv("headtermsGSEA.csv")[,2:3]
fullsetGSEA <- read.csv("fullsetGSEA.csv")[,2:3]
# Pull out just the columns corresponding to gene symbols and LogFC for at least one pairwise comparison for the enrichment analysis
GSEAgenes <- kmt5b_hdlbpa %>% dplyr::filter(mutation == "kmt5b" & chromosomenumber != 18)
mydata.df.sub <- dplyr::select(GSEAgenes, LLgeneAbbrev, log2FoldChange)
#get rid of duplicates
mydata.df.sub <- mydata.df.sub %>% dplyr::filter(LLgeneAbbrev %in% mydata.df.sub$LLgeneAbbrev[!duplicated(mydata.df.sub$LLgeneAbbrev)])
# construct a named vector
mydata.gsea <- mydata.df.sub$log2FoldChange
names(mydata.gsea) <- as.character(mydata.df.sub$LLgeneAbbrev)
mydata.gsea <- sort(mydata.gsea, decreasing = TRUE)
#options for doing GSEA using ZFA
#CNStermsGSEA
#headtermsGSEA
#fullsetGSEA
# run GSEA with CNS terms
myGSEA.res <- GSEA(mydata.gsea, TERM2GENE=CNStermsGSEA, verbose=FALSE, seed=TRUE, minGSSize = 80, pvalueCutoff = 1)
myGSEA.df <- as_tibble(myGSEA.res@result)
# view results as an interactive table
datatable(myGSEA.df,
extensions = c('KeyTable', "FixedHeader"),
options = list(keys = TRUE, searchHighlight = TRUE, pageLength = 10, lengthMenu = c("10", "25", "50", "100"))) %>%
formatRound(columns=c(2:10), digits=2)The only term that is significant for head terms is lens. No terms are significant for CNS terms.
#import single cell markers
singlecellmarkers <- read.csv("zf5dpf_markersforGSEA.csv")
#select relevant columns
singlecellmarkers <- singlecellmarkers %>% dplyr::select(cluster.description, gene)
# Pull out just the columns corresponding to gene symbols and LogFC
GSEAgenes <- kmt5b_hdlbpa %>% dplyr::filter(mutation == "kmt5b" & chromosomenumber != 18)
mydata.df.sub <- dplyr::select(GSEAgenes, LLgeneAbbrev, log2FoldChange)
#get rid of duplicates
mydata.df.sub <- mydata.df.sub %>% dplyr::filter(LLgeneAbbrev %in% mydata.df.sub$LLgeneAbbrev[!duplicated(mydata.df.sub$LLgeneAbbrev)])
# construct a named vector
mydata.gsea <- mydata.df.sub$log2FoldChange
names(mydata.gsea) <- as.character(mydata.df.sub$LLgeneAbbrev)
mydata.gsea <- sort(mydata.gsea, decreasing = TRUE)
# run GSEA with single cell markers in 6dpf 23bp
myGSEA.res <- GSEA(mydata.gsea, TERM2GENE=singlecellmarkers, verbose=FALSE, seed=TRUE, minGSSize = 80)
myGSEA.df <- as_tibble(myGSEA.res@result)
#export
#write.csv(myGSEA.df, file="kmt5b_output/kmt5b-singlecellGSEA.csv")
#import
myGSEA.df <- read.csv(file="kmt5b_output/kmt5b-singlecellGSEA.csv")[,2:12]
datatable(myGSEA.df,
extensions = c('KeyTable', "FixedHeader"),
options = list(keys = TRUE, searchHighlight = TRUE, pageLength = 10, lengthMenu = c("10", "25", "50", "100"))) %>%
formatRound(columns=c(2:10), digits=2)#make network plot
myGSEA.res <- pairwise_termsim(myGSEA.res)
kmt5bnetwork <- emapplot(myGSEA.res, color="NES", categorySize="p.adjust", showCategory = 32)
#get data of out network plot
kmt5bnetwork <- ggplot_build(kmt5bnetwork)
networkdata <- kmt5bnetwork$plot$data
#make network plot
kmt5b_singlecell_network <- ggraph(networkdata) +
geom_edge_link(alpha=.8, aes_(width=~I(width)), colour='darkgrey') +
geom_node_point(aes(colour = color, size=size)) +
geom_node_text(aes(label=name), repel=TRUE) +
theme_void() +
scale_color_gradientn(colors = myheatcolors3, limit=c(-3.5,3.5), name="NES")
kmt5b_singlecell_network#export 700x500
#make heatmap
p3 <- heatplot(myGSEA.res, foldChange=mydata.gsea, showCategory = 33) + ggplot2::coord_flip()
#get data of out heatmap
p3 <- ggplot_build(p3)
heatmapdata <- p3$plot$data
#filter to only include fold changes > 0.5
genestoinclude <- heatmapdata %>% dplyr::filter(foldChange > 0.5 | foldChange < -0.5)
heatmapdata <- heatmapdata %>% dplyr::filter(Gene %in% genestoinclude$Gene)
#filter to only include clusters to go in heatmap
heatmapdata <- heatmapdata %>% dplyr::filter(categoryID %in% c("diencephalon (tuberculum; gabaergic)", "granule cells", "hindbrain", "hypothalamus #3", "mid-hind boundary (gabaergic) #1", "mid-hind boundary (gabaergic) #2", "neurons #4", "neurons (ganglion)", "neurons (glutamatergic) #1", "neurons (glutamatergic) #2", "neurons (midbrain?, glutamatergic)", "olfactory bulb", "oligodendrocytes #1", "optic tectum (gabaergic) #1", "progenitors #1", "progenitors #2", "progenitors #3", "progenitors (cycling)", "progenitors/neurons (differentiating)", "ventral forebrain (dienc, hyp, poa; gabaergic) #1"))
#use pivot wider to make untidy table
heatmapdata <- heatmapdata %>% pivot_wider(names_from = categoryID, values_from = foldChange, values_fill=NA)
#make matrix
heatmapmatrix <- as.matrix(heatmapdata[,2:21])
rownames(heatmapmatrix) <- heatmapdata$Gene
#turn matrix into 1s and 0s and cluster
heatmapmatrix<-ifelse(abs(heatmapmatrix) > 0,1,0)
heatmapmatrix[is.na(heatmapmatrix)] = 0
#cluster rows by pearson correlation
hc <- hclust(as.dist(1-cor(heatmapmatrix, method="spearman")), method="average") #cluster columns by spearman correlation
#turn heatmap data back into tidy table
heatmapdata <- heatmapdata %>% pivot_longer(!Gene, names_to = "categoryID", values_to = "foldChange")
#change order of data to be plotted
heatmapdata$categoryID <- factor(heatmapdata$categoryID, levels = hc$labels)
heatmapdata$Gene <- factor(heatmapdata$Gene, levels = rev(unique(heatmapdata$Gene)))
#rename dataframe columns
colnames(heatmapdata) <- c("Gene", "categoryID", "log2foldchange")
singlecellheatmap <- ggplot(heatmapdata, aes(x = Gene, y = categoryID, fill = log2foldchange)) +
geom_tile(color="black") +
theme_classic() +
scale_fill_continuous_divergingx(palette = 'RdBu', mid = 0, na.value="white") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1), axis.title.x=element_blank(), axis.title.y=element_blank())
singlecellheatmap + ggplot2::coord_flip()#export 800x2500
#repeat 6dpf GSEA but with neuronal clusters only
nonneuronalclusters <- c("#N/A", "cartilage #2", "cartilage #1", "cartilage #3", "cornea", "cranial ganglion", "cranial ganglion (vagal) #1", "cranial ganglion (vagal) #2", "dermal bone", "epidermis (progenitors)", "epithelium (cornea, progenitors)", "erythrocytes", "eye (cornea), cartilage", "eye (cornea, epithelium)", "eye (non retina)", "glia (eye)", "lens #1", "lens #2", "muscle", "neural crest derived (pigment/xanthophore)", "neutrophils", "otic vesicle", "otic/lateral line", "pigment cell (iridophore)", "retina (amacrine, gabaergic)", "retina (cone bipolar cells)", "retina (cones)", "retina (muller glia)", "retina (photoreceptor precursor cells)", "retina (RGC)", "retina (rods)", "retina (RPE differentiation)", "retina (RPE)", "retina neurons (horizontal?)", "rostral blood island (myeloid) #1", "rostral blood island (myeloid) #2", "pharangeal arch/pectoral fin (mesoderm)")
#make GSEA set for CNS single cell data
singlecellmarkers_CNS <- singlecellmarkers %>% dplyr::filter(! cluster.description %in% nonneuronalclusters)
# run GSEA with single cell markers in 6dpf 23bp
myGSEA.res <- GSEA(mydata.gsea, TERM2GENE=singlecellmarkers_CNS, verbose=FALSE, seed=TRUE, minGSSize = 80)
myGSEA.df <- as_tibble(myGSEA.res@result)
#export
#write.csv(myGSEA.df, file="kmt5b_output/kmt5b-singlecellGSEA-CNS.csv")
#import
myGSEA.df <- read.csv(file="kmt5b_output/kmt5b-singlecellGSEA-CNS.csv")[,2:12]
#view data table
datatable(myGSEA.df,
extensions = c('KeyTable', "FixedHeader"),
options = list(keys = TRUE, searchHighlight = TRUE, pageLength = 10, lengthMenu = c("10", "25", "50", "100"))) %>%
formatRound(columns=c(2:10), digits=2)#make network plot
myGSEA.res <- pairwise_termsim(myGSEA.res)
kmt5bnetwork <- emapplot(myGSEA.res, color="NES", categorySize="p.adjust", showCategory = 33)
#get data of out network plot
kmt5bnetwork <- ggplot_build(kmt5bnetwork)
networkdata <- kmt5bnetwork$plot$data
#make new network plot
kmt5b_singlecellCNS_network <- ggraph(networkdata) +
geom_edge_link(alpha=.8, aes_(width=~I(width)), colour='darkgrey') +
geom_node_point(aes(colour = color, size=size)) +
geom_node_text(aes(label=name), repel=TRUE) +
theme_void() +
scale_color_gradientn(colors = myheatcolors3, limit=c(-3.5,3.5), name="NES")
kmt5b_singlecellCNS_network#pick out a genes to plot
geneofinterest <- c(sharedgenes, "mki67")
#import normalized counts for hdlbpa
hdlbpa_genecounts <- read.csv("hdlbpa-homvswt_normalized_reads_gene_list.csv")[,2:12]
#make new columns with mean counts of wt
genecountslogfc <- hdlbpa_genecounts
genecountslogfc$hdlbpaavg <- rowMeans(genecountslogfc[,8:11])
#divide columns by mean counts of wt
genecountslogfc <- genecountslogfc %>% mutate(across(colnames(genecountslogfc)[4:11], function(x) x/hdlbpaavg))
#pull out data for a select gene
generepdata <- genecountslogfc %>% dplyr::filter(LLgeneAbbrev %in% geneofinterest)
#get rid of unnecessary columns
generepdata <- generepdata[,c(1,4:11)]
#pivot longer to make tidy
generepdata <- generepdata %>% pivot_longer(!LLgeneAbbrev, names_to = "sample", values_to = "foldchange")
#add condition information based on sampleName
generepdata$genotype <- generepdata$sample
#substitute condition name based on replicate
generepdata <- generepdata %>% mutate(genotype = str_replace_all(genotype, c("^wt.*"="wt", "^hom.*"="hom")))
#order data on plot
generepdata$genotype <- factor(generepdata$genotype, levels=c("wt", "hom"))
#set colors
colors <- c("#472D7BFF", "#21908CFF")
#make an empty list
plot_list = list()
#make all the plots
for (z in 1:length(geneofinterest)) {
genedata2dpf <- generepdata %>% dplyr::filter(LLgeneAbbrev == geneofinterest[z])
plot <- ggplot(genedata2dpf, aes(fill=genotype, y=foldchange, x=genotype)) +
geom_bar(position="dodge", stat="summary") +
geom_point(position = position_dodge(width = .9)) +
labs(title = paste(geneofinterest[z])) +
ylab("fold change") +
theme_bw() +
scale_fill_manual(values = colors)
plot_list[[z]] <- plot
}
# Save plots to svg Makes a separate file for each plot.
for (i in 1:length(geneofinterest)) {
file_name = paste("hdlbpa_foldchange_", geneofinterest[i], ".svg", sep="")
svglite(file_name)
print(plot_list[[i]])
dev.off()
} #subset data to hdlbpa
myTopHits.df <- kmt5b_hdlbpa %>% dplyr::filter(mutation == "hdlbpa")
#add column about whether chr is 6
myTopHits.df <- myTopHits.df %>% dplyr::mutate(chrom = replace_na(chrom, "none"))
myTopHits.df <- myTopHits.df %>% mutate(chromosome = ifelse(chrom == "chr6", "chromosome 6", "other chromosome"))
myTopHits.cellcycle <- myTopHits.df %>% dplyr::filter(LLgeneAbbrev %in% c("thap1", "cdc40", "esco2", "mki67", "nusap1", "rbbp8"))
myTopHits.cellcycle$category <- "cell cycle"
#list of insulin genes
myTopHits.insulin <- myTopHits.df %>% dplyr::filter(LLgeneAbbrev %in% c("pik3r3b", "pik3r6a", "irs4a", "insl5a"))
myTopHits.insulin$category <- "insulin"
myTopHits.collagen <- myTopHits.df %>% dplyr::filter(LLgeneAbbrev %in% c("col4a3", "col9a3", "col11a2", "fbn2b", "matn1", "prelp", "si:dkey-239b22.2"))
myTopHits.collagen$category <- "collagen/ECM"
myTopHits.glycerophospholipid <- myTopHits.df %>% dplyr::filter(LLgeneAbbrev %in% c("gpd1b", "mboat1", "ppap2d", "pla2g15", "plpp1a", "proca1"))
myTopHits.glycerophospholipid$category <- "glycerophospholipid metabolism"
#genes to label
myTopHits.labels <- rbind(myTopHits.insulin, myTopHits.collagen, myTopHits.cellcycle, myTopHits.glycerophospholipid)
genestolabel <- c("hdlbpa", "ctsll", "p4hb", "stx8", "CR407584.1", "ptprnb", "ecrg4b", "BX511311.3", "gpr27", "mybphb", "XLOC_036545", "stat2", "nupr1b", "BACZ01084081.1", "plek2", "zgc:162999", "CABZ01084081.1", "si:dkey-61p9.11", "insl5a")
#subset to only include labeled genes
myTopHits.labels.all <- myTopHits.df %>% dplyr::filter(LLgeneAbbrev %in% genestolabel)
#make all points other
myTopHits.df <- myTopHits.df %>% dplyr::mutate(mutation = "other")
#make the plot
hdlbpa_volcano <- ggplot() +
annotate("rect", xmin = 1, xmax = 6, ymin = -log10(0.05), ymax = 20,
alpha=.15, fill="grey") +
annotate("rect", xmin = -1, xmax = -6, ymin = -log10(0.05), ymax = 20,
alpha=.15, fill="grey") +
geom_point(data=myTopHits.df, aes(y=-log10(padjbound), x=log2FoldChange, shape = chromosome, color = mutation, text = paste("Symbol:", LLgeneAbbrev)), size=2) +
geom_point(data=myTopHits.labels, aes(y=-log10(padjbound), x = log2FoldChange, color=category, shape=chromosome), size=2, show.legend = T) +
theme_bw() +
coord_cartesian(xlim = c(-6.5, 6.5), ylim = c(-0.5, 10.5), expand = FALSE) +
ylab("-log10(padj)") +
xlab("log2 fold change") +
geom_label_repel(data=myTopHits.labels.all, aes(x=log2FoldChange, y=-log10(padjbound), label=LLgeneAbbrev), force = 2, nudge_y = -1, size = 2.5, max.overlaps = Inf, show.legend = FALSE, color = "black") + #label selected genes
theme_bw() +
scale_color_manual(values = c("#AADC32FF", "#27AD81FF", "#472D7BFF","gold", "darkgrey", "deepskyblue", "orange", "#D697FF"), name = "pathway") +
scale_shape_manual(values=c(4, 16))
hdlbpa_volcanoGet a list of the top 300 genes dysregulated in hdlbpa by padj to put into DAVID.
#select hdlbpa genes that aren't on chromosome 6
hdlbpa_genesfordavid <- kmt5b_hdlbpa %>% dplyr::filter(mutation == "hdlbpa" & chromosomenumber != 6)
#sort by pvalue
hdlbpa_genesfordavid <- hdlbpa_genesfordavid %>% dplyr::arrange(padj)
#get names of top 300 genes
hdlbpa_genesfordavid <- hdlbpa_genesfordavid$LLgeneAbbrev[1:300]
#write.csv(hdlbpa_genesfordavid, "kmt5b_output/hdlbpa_genesfordavid.csv")
# use the 'gost' function from the gprofiler2 package to run GO enrichment analysis
#genes shared between comparisons - both up and downregulated
gost.res.homwtall <- gost(hdlbpa_genesfordavid, organism = "drerio", correction_method = "fdr", significant = TRUE, evcodes = TRUE, ordered_query = TRUE)
#export to csv
#write.csv(apply(gost.res.homwtall$result,2,as.character), file="kmt5b_output/hdlbpa_GO-analysis.csv")
# produce a manhattan plot of enriched GO terms
gostplot(gost.res.homwtall, interactive = T, capped = T) # choose a specific msigdb collection/subcollection
dr_gsea_c5 <- msigdbr(species = "Danio rerio", category = "C5") %>% # choose msigdb collection of interest
dplyr::select(gs_name, gene_symbol) #just get the columns corresponding to signature name and gene symbols of genes in each signature
# pull out data for hdlbpa
GSEAgenes <- kmt5b_hdlbpa %>% dplyr::filter(mutation == "hdlbpa" & chromosomenumber != 6)
mydata.df.sub <- dplyr::select(GSEAgenes, LLgeneAbbrev, log2FoldChange)
#get rid of duplicates
mydata.df.sub <- mydata.df.sub %>% dplyr::filter(LLgeneAbbrev %in% mydata.df.sub$LLgeneAbbrev[!duplicated(mydata.df.sub$LLgeneAbbrev)])
# construct a named vector
mydata.gsea <- mydata.df.sub$log2FoldChange
names(mydata.gsea) <- as.character(mydata.df.sub$LLgeneAbbrev)
mydata.gsea <- sort(mydata.gsea, decreasing = TRUE)
# run GSEA with C5 in kmt5b
myGSEA.res <- GSEA(mydata.gsea, TERM2GENE=dr_gsea_c5, verbose=FALSE, seed=TRUE)
#convert to DF
myGSEA.df <- as_tibble(myGSEA.res)
#look at top terms
datatable(myGSEA.df,
extensions = c('KeyTable', "FixedHeader"),
options = list(keys = TRUE, searchHighlight = TRUE, pageLength = 10, lengthMenu = c("10", "25", "50", "100"))) %>%
formatRound(columns=c(2:10), digits=2)#import data
myGSEA.df <- read.csv("kmt5b_output/hdlbpa-GSEA-c5.csv")[,2:12]
#list of terms to include in network
networkterms <- c("GOCC_CHROMOSOME_CENTROMERIC_REGION", "GOBP_MITOTIC_NUCLEAR_DIVISION", "GOCC_APICAL_PLASMA_MEMBRANE", "GOBP_MITOTIC_SISTER_CHROMATID_SEGREGATION ", "GOCC_CONDENSED_CHROMOSOME", "GOCC_CONDENSED_CHROMOSOME_CENTROMERIC_REGION", "GOBP_CELL_DIVISION", "GOMF_CARBOHYDRATE_TRANSMEMBRANE_TRANSPORTER_ACTIVITY", "GOMF_SUGAR_TRANSMEMBRANE_TRANSPORTER_ACTIVITY")
myGSEA.res.filter <- myGSEA.res
myGSEA.res.filter@result <- myGSEA.res.filter@result %>% dplyr::filter(ID %in% networkterms)
#make network plot
myGSEA.res.filter <- pairwise_termsim(myGSEA.res.filter)
hdlbpanetwork <- emapplot(myGSEA.res.filter, color="NES", categorySize="p.adjust", showCategory=length(networkterms))
#get data of out network plot
hdlbpanetwork <- ggplot_build(hdlbpanetwork)
networkdata <- hdlbpanetwork$plot$data
hdlbpa_c5_network <- ggraph(networkdata) +
geom_edge_link(alpha=.8, aes_(width=~I(width)), colour='darkgrey') +
geom_node_point(aes(colour = color, size=size)) +
geom_node_text(aes(label=name), repel=TRUE) +
theme_void() +
scale_color_gradientn(colors = myheatcolors3, limit=c(-3.5,3.5), name="NES")
hdlbpa_c5_network#export 700x500
#make heatmap
p3 <- heatplot(myGSEA.res, foldChange=mydata.gsea, showCategory=749) + ggplot2::coord_flip()
#get data of out heatmap
p3 <- ggplot_build(p3)
heatmapdata <- p3$plot$data
#filter to only include fold changes > 0.5
genestoinclude <- heatmapdata %>% dplyr::filter(foldChange > 0.5 | foldChange < -0.5)
heatmapdata <- heatmapdata %>% dplyr::filter(Gene %in% genestoinclude$Gene)
#choose which clusters to keep in heatmap
heatmapdata <- heatmapdata %>% dplyr::filter(categoryID %in% c("GOCC_CHROMOSOME_CENTROMERIC_REGION", "GOBP_MITOTIC_NUCLEAR_DIVISION", "GOCC_APICAL_PLASMA_MEMBRANE", "GOBP_MITOTIC_SISTER_CHROMATID_SEGREGATION ", "GOCC_CONDENSED_CHROMOSOME", "GOCC_CONDENSED_CHROMOSOME_CENTROMERIC_REGION", "GOBP_CELL_DIVISION", "GOMF_CARBOHYDRATE_TRANSMEMBRANE_TRANSPORTER_ACTIVITY", "GOMF_SUGAR_TRANSMEMBRANE_TRANSPORTER_ACTIVITY"))
#use pivot wider to make untidy table
heatmapdata <- heatmapdata %>% pivot_wider(names_from = categoryID, values_from = foldChange, values_fill=NA)
#make matrix
heatmapmatrix <- as.matrix(heatmapdata[,2:9])
rownames(heatmapmatrix) <- heatmapdata$Gene
#turn matrix into 1s and 0s and cluster
heatmapmatrix<-ifelse(abs(heatmapmatrix) > 0,1,0)
heatmapmatrix[is.na(heatmapmatrix)] = 0
#cluster rows by pearson correlation
hc <- hclust(as.dist(1-cor(heatmapmatrix, method="spearman")), method="average") #cluster columns by spearman correlation
#turn heatmap data back into tidy table
heatmapdata <- heatmapdata %>% pivot_longer(!Gene, names_to = "categoryID", values_to = "foldChange")
#change order of data to be plotted
heatmapdata$categoryID <- factor(heatmapdata$categoryID, levels = hc$labels)
heatmapdata$Gene <- factor(heatmapdata$Gene, levels = unique(heatmapdata$Gene))
#rename dataframe columns
colnames(heatmapdata) <- c("Gene", "categoryID", "log2foldchange")
hdlbpa_c5_heatmap <- ggplot(heatmapdata, aes(x = Gene, y = categoryID, fill = log2foldchange)) +
geom_tile(color="black") +
theme_classic() +
scale_fill_continuous_divergingx(palette = 'RdBu', mid = 0, na.value="white") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1), axis.title.x=element_blank(), axis.title.y=element_blank())
hdlbpa_c5_heatmap + ggplot2::coord_flip()# Pull out just the columns corresponding to gene symbols and LogFC for at least one pairwise comparison for the enrichment analysis
GSEAgenes <- kmt5b_hdlbpa %>% dplyr::filter(mutation == "hdlbpa" & chromosomenumber != 6)
mydata.df.sub <- dplyr::select(GSEAgenes, LLgeneAbbrev, log2FoldChange)
#get rid of duplicates
mydata.df.sub <- mydata.df.sub %>% dplyr::filter(LLgeneAbbrev %in% mydata.df.sub$LLgeneAbbrev[!duplicated(mydata.df.sub$LLgeneAbbrev)])
# construct a named vector
mydata.gsea <- mydata.df.sub$log2FoldChange
names(mydata.gsea) <- as.character(mydata.df.sub$LLgeneAbbrev)
mydata.gsea <- sort(mydata.gsea, decreasing = TRUE)
#options for doing GSEA using ZFA
#CNStermsGSEA
#headtermsGSEA
#fullsetGSEA
# run GSEA with CNS terms
myGSEA.res <- GSEA(mydata.gsea, TERM2GENE=CNStermsGSEA, verbose=FALSE, seed=TRUE, minGSSize = 80, pvalueCutoff = 1)
myGSEA.df <- as_tibble(myGSEA.res@result)
#export file
#write.csv(myGSEA.df, file="kmt5b_output/hdlbpa_CNS_GSEA.csv")
# view results as an interactive table
datatable(myGSEA.df,
extensions = c('KeyTable', "FixedHeader"),
options = list(keys = TRUE, searchHighlight = TRUE, pageLength = 10, lengthMenu = c("10", "25", "50", "100"))) %>%
formatRound(columns=c(2:10), digits=2)Head terms with positive enrichment scores are lens, eye photoreceptor cell, and ciliary marginal zone. No terms are significant for CNS terms.
# Pull out just the columns corresponding to gene symbols and LogFC
GSEAgenes <- kmt5b_hdlbpa %>% dplyr::filter(mutation == "hdlbpa" & chromosomenumber != 6)
mydata.df.sub <- dplyr::select(GSEAgenes, LLgeneAbbrev, log2FoldChange)
#get rid of duplicates
mydata.df.sub <- mydata.df.sub %>% dplyr::filter(LLgeneAbbrev %in% mydata.df.sub$LLgeneAbbrev[!duplicated(mydata.df.sub$LLgeneAbbrev)])
# construct a named vector
mydata.gsea <- mydata.df.sub$log2FoldChange
names(mydata.gsea) <- as.character(mydata.df.sub$LLgeneAbbrev)
mydata.gsea <- sort(mydata.gsea, decreasing = TRUE)
# run GSEA with single cell markers in 6dpf 23bp
myGSEA.res <- GSEA(mydata.gsea, TERM2GENE=singlecellmarkers, verbose=FALSE, seed=TRUE, minGSSize = 80)
myGSEA.df <- as_tibble(myGSEA.res@result)
#export
#write.csv(myGSEA.df, file="kmt5b_output/hdlbpa-singlecellGSEA.csv")
#import
myGSEA.df <- read.csv(file="kmt5b_output/hdlbpa-singlecellGSEA.csv")[,2:12]
datatable(myGSEA.df,
extensions = c('KeyTable', "FixedHeader"),
options = list(keys = TRUE, searchHighlight = TRUE, pageLength = 10, lengthMenu = c("10", "25", "50", "100"))) %>%
formatRound(columns=c(2:10), digits=2)#make network plot
myGSEA.res <- pairwise_termsim(myGSEA.res)
hdlbpanetwork <- emapplot(myGSEA.res, color="NES", categorySize="p.adjust", showCategory = 11)
#get data of out network plot
hdlbpanetwork <- ggplot_build(hdlbpanetwork)
networkdata <- hdlbpanetwork$plot$data
#make network plot
hdlbpa_singlecell_network <- ggraph(networkdata) +
geom_edge_link(alpha=.8, aes_(width=~I(width)), colour='darkgrey') +
geom_node_point(aes(colour = color, size=size)) +
geom_node_text(aes(label=name), repel=TRUE) +
theme_void() +
scale_color_gradientn(colors = myheatcolors3, limit=c(-3,3), name="NES")
hdlbpa_singlecell_network#export 700x500
#repeat 6dpf GSEA but with neuronal clusters only
nonneuronalclusters <- c("#N/A", "cartilage #2", "cartilage #1", "cartilage #3", "cornea", "cranial ganglion", "cranial ganglion (vagal) #1", "cranial ganglion (vagal) #2", "dermal bone", "epidermis (progenitors)", "epithelium (cornea, progenitors)", "erythrocytes", "eye (cornea), cartilage", "eye (cornea, epithelium)", "eye (non retina)", "glia (eye)", "lens #1", "lens #2", "muscle", "neural crest derived (pigment/xanthophore)", "neutrophils", "otic vesicle", "otic/lateral line", "pigment cell (iridophore)", "retina (amacrine, gabaergic)", "retina (cone bipolar cells)", "retina (cones)", "retina (muller glia)", "retina (photoreceptor precursor cells)", "retina (RGC)", "retina (rods)", "retina (RPE differentiation)", "retina (RPE)", "retina neurons (horizontal?)", "rostral blood island (myeloid) #1", "rostral blood island (myeloid) #2", "pharangeal arch/pectoral fin (mesoderm)")
#make GSEA set for CNS single cell data
singlecellmarkers_CNS <- singlecellmarkers %>% dplyr::filter(! cluster.description %in% nonneuronalclusters)
# run GSEA with single cell markers in 6dpf 23bp
myGSEA.res <- GSEA(mydata.gsea, TERM2GENE=singlecellmarkers_CNS, verbose=FALSE, seed=TRUE, minGSSize = 80)
myGSEA.df <- as_tibble(myGSEA.res@result)
#export
#write.csv(myGSEA.df, file="kmt5b_output/hdlbpa-singlecellGSEA-CNS.csv")
#import
myGSEA.df <- read.csv(file="kmt5b_output/hdlbpa-singlecellGSEA-CNS.csv")[,2:12]
#view data table
datatable(myGSEA.df,
extensions = c('KeyTable', "FixedHeader"),
options = list(keys = TRUE, searchHighlight = TRUE, pageLength = 10, lengthMenu = c("10", "25", "50", "100"))) %>%
formatRound(columns=c(2:10), digits=2)#make network plot
myGSEA.res <- pairwise_termsim(myGSEA.res)
hdlbpanetwork <- emapplot(myGSEA.res, color="NES", categorySize="p.adjust", showCategory = 7)
#get data of out network plot
hdlbpanetwork <- ggplot_build(hdlbpanetwork)
networkdata <- hdlbpanetwork$plot$data
#make new network plot
hdlbpa_singlecellCNS_network <- ggraph(networkdata) +
geom_edge_link(alpha=.8, aes_(width=~I(width)), colour='darkgrey') +
geom_node_point(aes(colour = color, size=size)) +
geom_node_text(aes(label=name), repel=TRUE) +
theme_void() +
scale_color_gradientn(colors = myheatcolors3, limit=c(-3,3), name="NES")
hdlbpa_singlecellCNS_network#export 700x500
#make heatmap
p3 <- heatplot(myGSEA.res, foldChange=mydata.gsea, showCategory = 7) + ggplot2::coord_flip()
#get data of out heatmap
p3 <- ggplot_build(p3)
heatmapdata <- p3$plot$data
#filter to only include fold changes > 0.5
genestoinclude <- heatmapdata %>% dplyr::filter(foldChange > 0.5 | foldChange < -0.5)
heatmapdata <- heatmapdata %>% dplyr::filter(Gene %in% genestoinclude$Gene)
#use pivot wider to make untidy table
heatmapdata <- heatmapdata %>% pivot_wider(names_from = categoryID, values_from = foldChange, values_fill=NA)
#make matrix
heatmapmatrix <- as.matrix(heatmapdata[,2:8])
rownames(heatmapmatrix) <- heatmapdata$Gene
#turn matrix into 1s and 0s and cluster
heatmapmatrix<-ifelse(abs(heatmapmatrix) > 0,1,0)
heatmapmatrix[is.na(heatmapmatrix)] = 0
#cluster your selected genes
hr <- hclust(as.dist(1-cor(t(heatmapmatrix), method="pearson")), method="complete") #cluster rows by pearson correlation
hc <- hclust(as.dist(1-cor(heatmapmatrix, method="spearman")), method="average") #cluster columns by spearman correlation
#turn heatmap data back into tidy table
heatmapdata <- heatmapdata %>% pivot_longer(!Gene, names_to = "categoryID", values_to = "foldChange")
#change order of data to be plotted
heatmapdata$categoryID <- factor(heatmapdata$categoryID, levels = hc$labels)
heatmapdata$Gene <- factor(heatmapdata$Gene, levels = unique(heatmapdata$Gene))
#rename dataframe columns
colnames(heatmapdata) <- c("Gene", "categoryID", "log2foldchange")
singlecellheatmap <- ggplot(heatmapdata, aes(x = Gene, y = categoryID, fill = log2foldchange)) +
geom_tile(color="black") +
theme_classic() +
scale_fill_continuous_divergingx(palette = 'RdBu', mid = 0, na.value="white") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1), axis.title.x=element_blank(), axis.title.y=element_blank())
singlecellheatmap + ggplot2::coord_flip()Packages and versions necessary to reproduce the results in this report.
## R version 4.3.1 (2023-06-16 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] eulerr_7.0.0
## [2] svglite_2.1.2
## [3] ggraph_2.1.0
## [4] colorspace_2.1-0
## [5] ggpubr_0.6.0
## [6] lattice_0.22-5
## [7] org.Dr.eg.db_3.18.0
## [8] ensembldb_2.26.0
## [9] AnnotationFilter_1.26.0
## [10] GenomicFeatures_1.54.1
## [11] ChIPseeker_1.38.0
## [12] orthogene_1.8.0
## [13] plotly_4.10.3
## [14] BaseSet_0.9.0
## [15] ontologyIndex_2.11
## [16] enrichplot_1.22.0
## [17] msigdbr_7.5.1
## [18] clusterProfiler_4.10.0
## [19] gprofiler2_0.2.2
## [20] GSVA_1.50.0
## [21] GSEABase_1.64.0
## [22] graph_1.80.0
## [23] annotate_1.80.0
## [24] XML_3.99-0.14
## [25] AnnotationDbi_1.64.0
## [26] DT_0.30
## [27] RColorBrewer_1.1-3
## [28] ggrepel_0.9.4
## [29] scales_1.2.1
## [30] viridis_0.6.4
## [31] viridisLite_0.4.2
## [32] cowplot_1.1.1
## [33] DESeq2_1.42.0
## [34] SummarizedExperiment_1.32.0
## [35] Biobase_2.62.0
## [36] MatrixGenerics_1.14.0
## [37] matrixStats_1.1.0
## [38] GenomicRanges_1.54.1
## [39] GenomeInfoDb_1.38.0
## [40] IRanges_2.36.0
## [41] S4Vectors_0.40.1
## [42] BiocGenerics_0.48.0
## [43] lubridate_1.9.3
## [44] forcats_1.0.0
## [45] stringr_1.5.0
## [46] dplyr_1.1.3
## [47] purrr_1.0.2
## [48] readr_2.1.4
## [49] tidyr_1.3.0
## [50] tibble_3.2.1
## [51] ggplot2_3.4.4
## [52] tidyverse_2.0.0
## [53] knitr_1.45
## [54] tinytex_0.48
## [55] rmarkdown_2.25
## [56] TxDb.Hsapiens.UCSC.hg38.knownGene_3.18.0
## [57] EnsDb.Hsapiens.v75_2.99.0
##
## loaded via a namespace (and not attached):
## [1] ProtGenerics_1.34.0
## [2] fs_1.6.3
## [3] bitops_1.0-7
## [4] HDO.db_0.99.1
## [5] httr_1.4.7
## [6] tools_4.3.1
## [7] backports_1.4.1
## [8] utf8_1.2.4
## [9] R6_2.5.1
## [10] HDF5Array_1.30.0
## [11] lazyeval_0.2.2
## [12] rhdf5filters_1.14.0
## [13] withr_2.5.2
## [14] prettyunits_1.2.0
## [15] gridExtra_2.3
## [16] cli_3.6.1
## [17] scatterpie_0.2.1
## [18] labeling_0.4.3
## [19] sass_0.4.7
## [20] systemfonts_1.0.5
## [21] Rsamtools_2.18.0
## [22] yulab.utils_0.1.0
## [23] gson_0.1.0
## [24] DOSE_3.28.0
## [25] plotrix_3.8-2
## [26] rstudioapi_0.15.0
## [27] RSQLite_2.3.2
## [28] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [29] generics_0.1.3
## [30] gridGraphics_0.5-1
## [31] BiocIO_1.12.0
## [32] crosstalk_1.2.0
## [33] vroom_1.6.4
## [34] gtools_3.9.4
## [35] car_3.1-2
## [36] homologene_1.4.68.19.3.27
## [37] GO.db_3.18.0
## [38] Matrix_1.6-1.1
## [39] fansi_1.0.5
## [40] abind_1.4-5
## [41] lifecycle_1.0.4
## [42] yaml_2.3.7
## [43] carData_3.0-5
## [44] gplots_3.1.3
## [45] rhdf5_2.46.0
## [46] qvalue_2.34.0
## [47] SparseArray_1.2.0
## [48] BiocFileCache_2.10.1
## [49] grid_4.3.1
## [50] blob_1.2.4
## [51] promises_1.2.1
## [52] crayon_1.5.2
## [53] beachmat_2.18.0
## [54] KEGGREST_1.42.0
## [55] pillar_1.9.0
## [56] fgsea_1.28.0
## [57] boot_1.3-28.1
## [58] rjson_0.2.21
## [59] codetools_0.2-19
## [60] fastmatch_1.1-4
## [61] glue_1.6.2
## [62] ggfun_0.1.3
## [63] data.table_1.14.8
## [64] vctrs_0.6.4
## [65] png_0.1-8
## [66] treeio_1.26.0
## [67] gtable_0.3.4
## [68] cachem_1.0.8
## [69] xfun_0.41
## [70] S4Arrays_1.2.0
## [71] mime_0.12
## [72] tidygraph_1.2.3
## [73] SingleCellExperiment_1.24.0
## [74] interactiveDisplayBase_1.40.0
## [75] ellipsis_0.3.2
## [76] nlme_3.1-163
## [77] ggtree_3.10.0
## [78] bit64_4.0.5
## [79] progress_1.2.2
## [80] filelock_1.0.2
## [81] bslib_0.5.1
## [82] irlba_2.3.5.1
## [83] KernSmooth_2.23-22
## [84] DBI_1.1.3
## [85] tidyselect_1.2.0
## [86] bit_4.0.5
## [87] compiler_4.3.1
## [88] curl_5.1.0
## [89] xml2_1.3.5
## [90] DelayedArray_0.28.0
## [91] shadowtext_0.1.2
## [92] rtracklayer_1.62.0
## [93] caTools_1.18.2
## [94] rappdirs_0.3.3
## [95] digest_0.6.33
## [96] XVector_0.42.0
## [97] htmltools_0.5.6.1
## [98] pkgconfig_2.0.3
## [99] sparseMatrixStats_1.14.0
## [100] highr_0.10
## [101] dbplyr_2.4.0
## [102] fastmap_1.1.1
## [103] rlang_1.1.1
## [104] htmlwidgets_1.6.2
## [105] shiny_1.7.5.1
## [106] DelayedMatrixStats_1.24.0
## [107] farver_2.1.1
## [108] jquerylib_0.1.4
## [109] jsonlite_1.8.7
## [110] BiocParallel_1.36.0
## [111] GOSemSim_2.28.0
## [112] BiocSingular_1.18.0
## [113] RCurl_1.98-1.12
## [114] magrittr_2.0.3
## [115] GenomeInfoDbData_1.2.11
## [116] ggplotify_0.1.2
## [117] patchwork_1.1.3
## [118] Rhdf5lib_1.24.0
## [119] munsell_0.5.0
## [120] Rcpp_1.0.11
## [121] ggnewscale_0.4.9
## [122] ape_5.7-1
## [123] babelgene_22.9
## [124] stringi_1.8.1
## [125] zlibbioc_1.48.0
## [126] MASS_7.3-60
## [127] AnnotationHub_3.10.0
## [128] plyr_1.8.9
## [129] parallel_4.3.1
## [130] HPO.db_0.99.2
## [131] Biostrings_2.70.1
## [132] graphlayouts_1.0.1
## [133] splines_4.3.1
## [134] hms_1.1.3
## [135] polylabelr_0.2.0
## [136] locfit_1.5-9.8
## [137] igraph_1.5.1
## [138] ggsignif_0.6.4
## [139] reshape2_1.4.4
## [140] biomaRt_2.58.0
## [141] ScaledMatrix_1.10.0
## [142] BiocVersion_3.18.0
## [143] evaluate_0.23
## [144] BiocManager_1.30.22
## [145] tzdb_0.4.0
## [146] tweenr_2.0.2
## [147] httpuv_1.6.12
## [148] grr_0.9.5
## [149] polyclip_1.10-6
## [150] ggforce_0.4.1
## [151] rsvd_1.0.5
## [152] broom_1.0.5
## [153] xtable_1.8-4
## [154] restfulr_0.0.15
## [155] tidytree_0.4.5
## [156] MPO.db_0.99.7
## [157] rstatix_0.7.2
## [158] later_1.3.1
## [159] snow_0.4-4
## [160] aplot_0.2.2
## [161] GenomicAlignments_1.38.0
## [162] memoise_2.0.1
## [163] timechange_0.2.0